Observed data

Observed log-chlorophyll at representative station for the St. Lucie Estuary

library(tidyverse)
library(lubridate)
library(mgcv)  
library(plotly)
library(WRTDStidal)
library(gridExtra)
source('R/funcs.R')

# flow data, left moving window average of 30 days
data(sl_fldat)
fl_dat <- sl_fldat %>% 
  rename(date = Date) %>% 
  mutate(
    qsm = stats::filter(q, rep(1, 30)/30, sides = 1, method = 'convolution')
  )

# format the data to model
data(sl_dat)
sl_mod <- sl_dat %>%
  rename(date = Date) %>% 
  mutate(
    doy = yday(date), 
    dec_time = decimal_date(date), 
    yr = year(date),
    yr = factor(yr),
    mo = month(date, label = T)
  ) %>% 
  left_join(fl_dat, by = 'date') %>% 
  mutate(
    flo = log(qsm),
    chl = log(1 + chl)
    ) %>% 
  select(-q, -qsm, -sal)

# plot, all
p <- ggplot(sl_mod, aes(x = date, y = chl)) + 
  geom_line() +
  theme_bw() 
ggplotly(p)
# boxplot, by years
p <- ggplot(sl_mod, aes(x = yr, y = chl)) + 
  geom_boxplot() + 
  theme_bw()
ggplotly(p)
# boxplot, by month
p <- ggplot(sl_mod, aes(x = mo, y = chl)) + 
  geom_boxplot() + 
  theme_bw()
ggplotly(p)

Adding nitrogen and turbidity covariates

# formula for best annual, seasonal, flow model
strt <- best2$formula %>% 
  as.character

smths <- c(
  "s(nh, bs = 'tp')",  
  "s(tss, bs = 'tp')",
  "te(nh, tss, bs = c('tp', 'tp'))"
)

# get all combinations of smoothers to model, one to many
frmstab <- list()
frms <- list()
for(i in seq_along(smths)){
  
  # for the summary table  
  frmtab <- combn(smths, i) %>%
    apply(2, function(x){
      paste(x, collapse = ' + ')
      })
  
  # for the model
  frm <- sapply(frmtab, function(x){  
        paste('chl ~', strt[3], '+', x) %>% 
          formula
        })
  
  frmstab <- c(frmstab, frmtab)
  frms <- c(frms, frm)
 
}

# create models from smooth formula combinations
mods3 <- map(frms, function(frm){
  
  gam(frm, 
    knots = list(doy = c(1, 366)),
    data = sl_mod, 
    na.action = na.exclude
  )

})
names(mods3) <- paste0('mod', seq_along(mods3))

Summary of all nutrient, turbidity models

# smoother stats of GAMs
map(mods3, ~ summary(.x)$s.table %>% data.frame %>% rownames_to_column('smoother')) %>% 
  enframe %>% 
  unnest %>% 
  kable(digits  = 2)
name smoother edf Ref.df F p.value
mod1 s(dec_time) 7.15 8.14 2.28 0.02
mod1 s(doy) 4.43 8.00 4.71 0.00
mod1 s(flo) 3.55 4.35 2.76 0.03
mod1 te(flo,doy) 0.80 15.00 0.07 0.08
mod1 te(flo,dec_time) 2.07 16.00 0.21 0.05
mod1 te(dec_time,doy) 0.00 15.00 0.00 0.66
mod1 te(dec_time,doy,flo) 7.50 48.00 0.30 0.00
mod1 s(nh) 3.11 3.79 8.77 0.00
mod2 s(dec_time) 7.36 8.30 1.77 0.08
mod2 s(doy) 3.73 8.00 2.14 0.00
mod2 s(flo) 6.46 7.63 1.31 0.19
mod2 te(flo,doy) 0.20 15.00 0.01 0.18
mod2 te(flo,dec_time) 3.54 16.00 0.67 0.00
mod2 te(dec_time,doy) 0.00 15.00 0.00 0.62
mod2 te(dec_time,doy,flo) 9.43 48.00 0.38 0.00
mod2 s(tss) 4.01 4.92 3.55 0.00
mod3 s(dec_time) 8.04 8.73 2.74 0.01
mod3 s(doy) 4.41 8.00 5.71 0.00
mod3 s(flo) 1.00 1.00 5.88 0.02
mod3 te(flo,doy) 0.72 15.00 0.10 0.01
mod3 te(flo,dec_time) 3.87 16.00 1.28 0.00
mod3 te(dec_time,doy) 0.00 15.00 0.00 0.59
mod3 te(dec_time,doy,flo) 4.43 48.00 0.15 0.02
mod3 te(nh,tss) 10.99 12.32 5.54 0.00
mod4 s(dec_time) 7.87 8.63 2.28 0.02
mod4 s(doy) 4.33 8.00 5.69 0.00
mod4 s(flo) 4.66 5.69 2.82 0.01
mod4 te(flo,doy) 0.03 15.00 0.00 0.10
mod4 te(flo,dec_time) 2.70 16.00 0.36 0.01
mod4 te(dec_time,doy) 0.00 15.00 0.00 0.62
mod4 te(dec_time,doy,flo) 6.09 48.00 0.23 0.01
mod4 s(nh) 3.36 4.07 9.03 0.00
mod4 s(tss) 4.52 5.49 3.83 0.00
mod5 s(dec_time) 8.05 8.73 2.78 0.00
mod5 s(doy) 4.40 8.00 5.36 0.00
mod5 s(flo) 1.00 1.00 5.66 0.02
mod5 te(flo,doy) 0.98 15.00 0.12 0.02
mod5 te(flo,dec_time) 3.94 16.00 1.21 0.00
mod5 te(dec_time,doy) 0.00 15.00 0.00 0.60
mod5 te(dec_time,doy,flo) 4.37 48.00 0.15 0.02
mod5 s(nh) 3.62 4.51 5.90 0.00
mod5 te(nh,tss) 9.82 20.00 1.52 0.00
mod6 s(dec_time) 7.86 8.63 2.48 0.01
mod6 s(doy) 4.24 8.00 5.00 0.00
mod6 s(flo) 3.31 4.07 3.56 0.01
mod6 te(flo,doy) 0.23 15.00 0.03 0.07
mod6 te(flo,dec_time) 2.50 16.00 0.33 0.01
mod6 te(dec_time,doy) 0.00 15.00 0.00 0.62
mod6 te(dec_time,doy,flo) 6.31 48.00 0.24 0.01
mod6 s(tss) 4.60 5.58 3.60 0.00
mod6 te(nh,tss) 2.25 14.00 2.78 0.00
mod7 s(dec_time) 7.94 8.67 2.52 0.01
mod7 s(doy) 4.34 8.00 4.73 0.00
mod7 s(flo) 3.13 3.84 3.49 0.01
mod7 te(flo,doy) 0.36 15.00 0.05 0.08
mod7 te(flo,dec_time) 2.54 16.00 0.34 0.01
mod7 te(dec_time,doy) 0.00 15.00 0.00 0.64
mod7 te(dec_time,doy,flo) 5.96 48.00 0.23 0.01
mod7 s(nh) 1.00 1.00 0.01 0.93
mod7 s(tss) 4.42 5.37 3.78 0.00
mod7 te(nh,tss) 2.17 15.00 1.72 0.00
# summary stats of GAMs
map(mods3, ~ data.frame(
    AIC = AIC(.x), 
    R2 = summary(.x)$r.sq)) %>% 
  enframe %>% 
  unnest %>% 
  mutate(smth_added = frmstab) %>% 
  select(name, smth_added, everything()) %>% 
  kable(digits  = 2)
name smth_added AIC R2
mod1 s(nh, bs = ‘tp’) 497.81 0.37
mod2 s(tss, bs = ‘tp’) 534.20 0.35
mod3 te(nh, tss, bs = c(‘tp’, ‘tp’)) 481.66 0.41
mod4 s(nh, bs = ‘tp’) + s(tss, bs = ‘tp’) 481.38 0.41
mod5 s(nh, bs = ‘tp’) + te(nh, tss, bs = c(‘tp’, ‘tp’)) 481.83 0.41
mod6 s(tss, bs = ‘tp’) + te(nh, tss, bs = c(‘tp’, ‘tp’)) 478.98 0.41
mod7 s(nh, bs = ‘tp’) + s(tss, bs = ‘tp’) + te(nh, tss, bs = c(‘tp’, ‘tp’)) 479.77 0.41

Summary stats of best three three models:

# best model with season, year, flow
best3 <- map(mods3, ~ summary(.x)$r.sq) %>% 
  unlist %>% 
  which.max %>% 
  mods3[[.]] 

best <- list(best1 = best1, best2 = best2, best3 = best3)

# smoother stats of GAMs
map(best, ~ summary(.x)$s.table %>% data.frame %>% rownames_to_column('smoother')) %>% 
  enframe %>% 
  unnest %>% 
  kable(digits  = 2)
name smoother edf Ref.df F p.value
best1 s(dec_time) 1.00 1.00 10.64 0.00
best1 s(doy) 4.16 8.00 5.16 0.00
best1 te(dec_time,doy) 2.85 15.00 0.28 0.16
best2 s(dec_time) 1.00 1.00 1.93 0.17
best2 s(doy) 2.94 8.00 0.71 0.05
best2 s(flo) 6.78 7.86 2.96 0.00
best2 te(flo,doy) 2.96 12.00 0.58 0.01
best2 te(flo,dec_time) 0.21 15.00 0.02 0.09
best2 te(dec_time,doy) 0.00 15.00 0.00 0.81
best2 te(dec_time,doy,flo) 19.86 48.00 1.01 0.00
best3 s(dec_time) 8.05 8.73 2.78 0.00
best3 s(doy) 4.40 8.00 5.36 0.00
best3 s(flo) 1.00 1.00 5.66 0.02
best3 te(flo,doy) 0.98 15.00 0.12 0.02
best3 te(flo,dec_time) 3.94 16.00 1.21 0.00
best3 te(dec_time,doy) 0.00 15.00 0.00 0.60
best3 te(dec_time,doy,flo) 4.37 48.00 0.15 0.02
best3 s(nh) 3.62 4.51 5.90 0.00
best3 te(nh,tss) 9.82 20.00 1.52 0.00
# summary stats of GAMs
map(best, ~ data.frame(
    AIC = AIC(.x), 
    R2 = summary(.x)$r.sq)) %>% 
  enframe %>% 
  unnest %>% 
  kable(digits  = 2)
name AIC R2
best1 608.68 0.17
best2 556.83 0.34
best3 481.83 0.41
# predictions
sl_res3 <- map(best, function(x){
  sl_mod %>% 
    mutate(
      pred = predict(x)
    )
  }) %>% 
  enframe('mods') %>% 
  unnest

# plot
p <- ggplot(sl_res3, aes(x = date)) + 
  geom_point(data = sl_mod, aes(y = chl), size = 0.5) + 
  geom_line(aes(y = pred, colour = mods)) + 
  theme_bw() + 
  theme(
    legend.position = 'top', 
    legend.title = element_blank()
    )
ggplotly(p)